{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import unicode_literals\n",
    "import os\n",
    "import glob\n",
    "import spacy\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "file_dir = '/home/irteam/users/data/150kJavaScript/'\n",
    "nlp = spacy.load('en')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open(os.path.join(file_dir,'programs_training.txt')) as f:\n",
    "    files=f.readlines()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: '/home/irteam/users/data/150kJavaScript/data/fullscale/pypes/ui/pypesvds/public/js/wireit/LayerMap-beta.js'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-11-8b1fabf12fbd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_dir\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfiles\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m30\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m     \u001b[0mlines\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlines\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/home/irteam/users/data/150kJavaScript/data/fullscale/pypes/ui/pypesvds/public/js/wireit/LayerMap-beta.js'"
     ]
    }
   ],
   "source": [
    "with open(os.path.join(file_dir,files[30].strip())) as f:\n",
    "    lines = f.read()\n",
    "    print(lines)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "C\n",
      "K\n",
      "E\n",
      "D\n",
      "I\n",
      "T\n",
      "O\n",
      "R\n",
      ".\n",
      "p\n",
      "l\n",
      "u\n",
      "g\n",
      "i\n",
      "n\n",
      "s\n",
      ".\n",
      "s\n",
      "e\n",
      "t\n",
      "L\n",
      "a\n",
      "n\n",
      "g\n",
      "(\n",
      "\"\n",
      "s\n",
      "h\n",
      "o\n",
      "w\n",
      "b\n",
      "l\n",
      "o\n",
      "c\n",
      "k\n",
      "s\n",
      "\"\n",
      ",\n",
      "\"\n",
      "e\n",
      "n\n",
      "\"\n",
      ",\n",
      "{\n",
      "t\n",
      "o\n",
      "o\n",
      "l\n",
      "b\n",
      "a\n",
      "r\n",
      ":\n",
      "\"\n",
      "S\n",
      "h\n",
      "o\n",
      "w\n",
      "\n",
      "B\n",
      "l\n",
      "o\n",
      "c\n",
      "k\n",
      "s\n",
      "\"\n",
      "}\n",
      ")\n",
      ";\n"
     ]
    }
   ],
   "source": [
    "for line in lines:\n",
    "    line = line.strip()\n",
    "    print(nlp(line))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "Argument 'string' has incorrect type (expected unicode, got str)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-11-bac6da054da4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnlp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"tokenize this\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/home1/irteam/users/mjchoi/anaconda3/envs/py2.7/lib/python2.7/site-packages/spacy/language.pyc\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, text, tag, parse, entity)\u001b[0m\n\u001b[1;32m    318\u001b[0m             \u001b[0;34m(\u001b[0m\u001b[0;34m'An'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'NN'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    319\u001b[0m         \"\"\"\n\u001b[0;32m--> 320\u001b[0;31m         \u001b[0mdoc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_doc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    321\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mentity\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mentity\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    322\u001b[0m             \u001b[0;31m# Add any of the entity labels already set, in case we don't have them.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home1/irteam/users/mjchoi/anaconda3/envs/py2.7/lib/python2.7/site-packages/spacy/language.pyc\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(text)\u001b[0m\n\u001b[1;32m    291\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_doc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moverrides\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'create_make_doc'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    292\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'make_doc'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 293\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_doc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mtext\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    294\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;34m'pipeline'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0moverrides\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    295\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moverrides\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'pipeline'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: Argument 'string' has incorrect type (expected unicode, got str)"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "str"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "var dsfl = module(deof)\n",
    "this.type1 = toiewjrw09\n",
    "\n",
    "\n",
    "var abc = require(abc).module()\n",
    "\n",
    "var dcef = require(dcef).module()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:pytorch]",
   "language": "python",
   "name": "conda-env-pytorch-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
